# -*- coding:utf-8 -*-
import pymongo
import sys
import mongoDB

statDay = sys.argv[1]

mergeDir = "merge_dmarket_access_log"
subFix = "-dmarket_access_log"

collections = "dmarket_access_log"
collection = mongoDB.connMongo(collections)

def convert2unicode(mydict):
    tmpDict = {}
    for k, v in mydict.iteritems():
        tmpDict[k] = v.decode('utf-8', 'ignore').encode("utf-8")
    return tmpDict

day = statDay
fileDir = mergeDir + "/" + str(day) + subFix
f = open(fileDir, "r")
d = {}
for line in f:
    try:
        logStr = line.split("\"")
        #logStr[0] ip&time, logStr[1] url, logStr5 userAgent
        ipDate = logStr[0].split(" ")
        ip = ipDate[0] # 36.149.78.36
        d['ip'] = ip
        if ip == '114.67.23.48':
            continue
        date = ipDate[3]
        times = date.split(":", 1)
        time = times[1] # 23:57:25
        d['day'] = str(day)
        d['request_time'] = time
        d['response_time'] = logStr[8] # 0.001

        requestUrl = logStr[1].split(" ")
        oriUrl = requestUrl[1]
        urls = oriUrl.split("?", 1)
        baseUrl = urls[0] # /jump/normal
        if (baseUrl == '/otn/leftTicket/log') :
            continue
        #baseUrl.replace('.', '')
        d['base_url'] = baseUrl
        everyUrl = ''
        if len(urls) > 1:
            everyUrl = urls[1] # u=http%3A%2F%2Fmp.weixin.qq.com%2Fs%3F__biz%3DMzAwNDI2NjMxOA%3D%3D%26mid%3D2651582985%26idx%3D1%26sn%3D0e9241ae85d82e5d3ce3e7f7443db1a0%26scene%3D&yc_cmc=16c8ea47c3d991bf785a5bc57d36ff3a

        urlStr = everyUrl.split("&")
        for url in urlStr:
            kv = url.split("=")
            key = kv[0]
            value = ''
            if len(kv) > 1:
                value = kv[1]
            d[key] = value
            #print "IP: " + ip + " time: " + time + " baseUrl: " + baseUrl + " key: " + key + " value :" + value
        userAgentStr = logStr[5]
        userAgentArr = logStr[5].split(" ")
        if len(userAgentArr) > 15:
            system = userAgentArr[2] # Android/iOS
            d['system'] = system
            version = userAgentArr[3] # 6.0
            d['version'] = version
            corp = userAgentArr[4] + userAgentArr[5] # HUAWEINXT-AL10
            d['corp'] = corp
            ua = userAgentArr[15]
            uaArr = ua.split("/")
            imei = ''
            if len(uaArr) > 5:
                imei = uaArr[5].split("(")[0] # 861331032671098
#           print "System: " + system + " version: " + version + " corp: " + corp + " imei is " + imei
            d['imei'] = imei
       # print d
        dictData = convert2unicode(d)
        infoId = collection.insert(dictData, check_keys=False)
        d = {}
#       print infoId
    except Exception, e:
        print Exception, ":", e
f.close()
